import numpy as np
import pandas as pd

brand_file_path = '../../data/raw data/餐饮连锁品牌数据.xlsx'
cater_file_path = '../../data/raw data/餐饮连锁数据.xlsx'
sheet_names=['门店信息','菜品信息','营销记录','顾客评价']
sheet_name=sheet_names[1]
df_dish = pd.read_excel(cater_file_path, sheet_name)

# ======================================
# 3️⃣ 检查并处理重复值
# ======================================
print("\n【重复值检测】")
print(df_dish.duplicated().sum())
print('================================')
# --- 测试2：查看重复行 ---
if df_dish.duplicated().sum() > 0:
    print(df_dish[df_dish.duplicated()])

# 删除重复行
df_dish.drop_duplicates(inplace=True)


# --- 验证：重复值是否清除 ---
print("\n【重复值处理后验证】")
print(df_dish.duplicated().sum())